
#Out of sample forecast based on Nel and Righarts (2008)

library(readstata13)
library(tidyverse)
library(fixest)
library(readxl)
library(broom)
library(ggthemes)
library(Zelig)
library(stargazer)
library(latex2exp)
library(dplyr)
library(ggplot2)
library(ROCit)
if(!require('data.table')) {
  install.packages('data.table')
  library('data.table')
}
directory = c("/Users/roniyadlin/Dropbox/Climate conflict")
nd_conflict <- read.dta13("/Users/roniyadlin/Dropbox/Climate conflict/data/clean/climate_conflict_panel_country.dta")
setwd("/Users/roniyadlin/Dropbox/Climate conflict")
#### Load in control variables data (obtained from OWID), and merge to panel
child_mortality <- read_csv("data/raw/controls/child-mortality-igme.csv")  %>%
  filter(Year>2000)   %>%
  filter(Year<2021) 
polity <- read_csv("data/raw/controls/experience-with-democracy-polity.csv") %>%
  filter(Year>2000)   %>%
  filter(Year<2021) 
gdp_pc <- read_csv("data/raw/controls/gdp-per-capita-maddison-2020.csv") %>%
  filter(Year>2000)   %>%
  filter( Year<2021) 
political_regime <- read_csv("data/raw/controls/political-regime-amb-row.csv") %>%
  filter(Year>2000)   %>%
  filter(Year<2021) 
pop_growth <- read_csv("data/raw/controls/population-growth-rates.csv") %>%
  filter(Year>2000)   %>%
  filter(Year<2021) 
population <- read_csv("data/raw/controls/population-since-1800.csv") %>%
  filter(Year>2000)   %>%
  filter(Year<2021) 
brevpeace <- read_csv("data/raw/controls/brevity-of-peace.csv") %>%
  filter(Year>2000)   %>%
  filter(Year<2021) 

#put all data frames into list
df_list <- list(child_mortality, polity, gdp_pc, political_regime, pop_growth, population,brevpeace)      

#merge all data frames together

controls_df <- Reduce(function(x, y) merge(x, y, all=TRUE), df_list)  %>%
  filter_at(vars(Code), all_vars(!is.na(.))) %>%
  rename(iso3=Code, year=Year, infant_mr='Mortality rate, under-5 (per 1,000 live births)', 
         pop='Population (historical estimates)', gdp_pc='GDP per capita', regime_type='regime_amb_row_owid', 
         gdp_growth='Growth rate - Sex: all - Age: all - Variant: estimates')


## Merge
analysis_df <- full_join(controls_df, nd_conflict, by = c("iso3", "year"))

##make controls matrix
allND_pc<-analysis_df$anynatdis/analysis_df$pop
imr_t_1<-analysis_df$infant_mr
imr_sqr_t_1<-imr_t_1*imr_t_1
mixed<-analysis_df$regime_type
gdpgro<-analysis_df$gdp_growth
brevity<-analysis_df$brevpeace
controls<-cbind(allND_pc,imr_t_1,imr_sqr_t_1,mixed,
                gdpgro,brevity)
colnames(controls)<-c("allND_pc","imr_t_1","imr_sq_t_1",
                      "mixedx", "gdpgrox", "brevity")


##Run old regression for beta values:
rep_df <- read.dta13("/Users/roniyadlin/Dropbox/Climate conflict/replication materials/Nel and Righarts ND and VCC final 20 June 2007.dta")
rep_df <- rep_df %>%                           
  group_by(state) %>%
  mutate(imr_t_1 = lag(imr),
         imr_sq_t_1 = lag(imr_sq),
         vccall_tp1 = lead(vccall),
         vccmajor_tp1 = lead(vccmajor)) 
t1_2 <- zelig(vccall ~ allND_pc + imr_t_1 + imr_sq_t_1 + mixed + gdpgro + brevity, 
              model = "relogit", data = rep_df)
coefs2<-coefficients(t1_2)
coefs2<-as.data.frame(coefs2)
beta0<-data.frame(rep(coefs2[1,],times=nrow(controls)))

##Covariance matrix
V<-data.frame(vcov(t1_2))
rownames(coefs2)<-c("Intercept","allND_pc","imr_t_1","imr_sq_t_1",
                    "mixed", "gdpgro", "brevity")

##Forecasting
##Applying regression formula
forecast2<-beta0+controls%*%coefs2[2:7,]
colnames(forecast2)<-c("Civil_War")

##Calculating odds or war using logit equation
odds2<-exp(forecast2)
#max(na.omit(odds2))
#ggplot(odds2, aes(x=Civil_War)) + geom_histogram()

##Calculating probability of war using logit equation
prob2<-1/(1+exp(-forecast2))

##Making correction per Rare Events Logit (King/Zheng 2001)
Cmult<-as.matrix(t(coefs2)) %*% as.matrix(V) %*% as.matrix(coefs2)
C2<-(.5-prob2)*prob2*(1-prob2)*Cmult
prob2corrected<-prob2+C2
pred<-data.frame(analysis_df$Entity,analysis_df$year,prob2corrected)
colnames(pred)<-c("Entity","Year", "Civil_War_prob")

##Determining if regression predicts war 
##Number of predicted wars with different probability thresholds

probvec<-seq(0.05,.5,.05)
probs<-data.frame(rep(0,times=9847))
probs<-sapply(probvec, function(x){
  pred$War = ifelse(pred$Civil_War_prob > x, 1, 0)
  probs<-sum(na.omit(pred$War))
  rm(pred)
  pred<-data.frame(analysis_df$Entity,analysis_df$year,prob2corrected)
  colnames(pred)<-c("Entity","Year", "Civil_War_prob")
  return(probs)
}
)
probdf<-data.frame(probvec,probs)
colnames(probdf)<-c("Prob_Threshold","Num_Wars")


##Prediction using threshold of .5
comppred<-pred
comppred$War = ifelse(comppred$Civil_War_prob > .172, 1, 0)
sum(na.omit(comppred$War))

##Is there an actual war?
totevents<-data.frame(analysis_df$total_events)
totevents$War = ifelse(totevents > 0, 1, 0)
totwars<-sum(na.omit(totevents$War))

##Comparing actual conflict versus predicted
compdf<-data.frame(comppred,totevents$War,analysis_df$latitude,analysis_df$longitude)
colnames(compdf)<-c("Entity","Year","Prob_VCC","Predicted_VCC","Actual_VCC","Lat","Long")
matchdf<-na.omit(data.frame(compdf[compdf$Predicted_VCC==1 & compdf$Actual_VCC==1,]))
matches<-nrow(matchdf)

falsenegdf<-na.omit(data.frame(compdf[compdf$Predicted_VCC==0 & compdf$Actual_VCC==1,]))
falseneg<-nrow(falsenegdf)
falseposdf<-na.omit(data.frame(compdf[compdf$Predicted_VCC==1 & compdf$Actual_VCC==0,]))
falsepos<-nrow(falseposdf)

overall<-data.frame(matches,falseneg,falsepos)
overall[nrow(overall) + 1,] <- c(round(matches/totwars*100,2),round(falseneg/totwars*100,2),round(falsepos/totwars*100,2))
colnames(overall)<-c("Matches","False Negatives","False Positives")
rownames(overall)<-c("Number","Percentage")

table<-overall%>%
  kbl(caption = "Nel and Righarts Out of Sample Prediction Results") %>%
  kable_minimal()
print(xtable(overall, type = "latex"), file = "/Users/roniyadlin/Dropbox/Climate conflict/output/tables/NRtable.tex")

mapmatch<-data.frame(matchdf$Lat,matchdf$Long)
colnames(mapmatch)<-c("Lat","Long")
write.csv(mapmatch,"/Users/roniyadlin/Dropbox/Climate conflict/output/figures/Mapping/match.csv",row.names = FALSE)

##ROC Curve
roc<-roc(compdf$Actual_VCC,compdf$Prob_VCC)
gl <- ggroc(roc, color="red", legacy.axes = TRUE)
glsave<-gl + xlab("FPR") + ylab("TPR") +
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1), color="blue", linetype="dashed")
ggsave("/Users/roniyadlin/Dropbox/Climate conflict/output/figures/ROC_NR.png",glsave,device="png")


##DF to plot map
mapdf<-data.frame(compdf,analysis_df$latitude,analysis_df$longitude)
mappredlat<-na.omit(data.frame(mapdf$analysis_df.latitude[mapdf$Predicted_VCC==1]))
mappredlong<-na.omit(data.frame(mapdf$analysis_df.longitude[mapdf$Predicted_VCC==1]))
mappred<-data.frame(mappredlat,mappredlong)
colnames(mappred)<-c("Lat","Long")
write.csv(mappred,"/Users/roniyadlin/Dropbox/Climate conflict/output/figures/Mapping/predict.csv",row.names = FALSE)
mapactlat<-na.omit(data.frame(mapdf$analysis_df.latitude[mapdf$Actual_VCC==1]))
mapactlong<-na.omit(data.frame(mapdf$analysis_df.longitude[mapdf$Actual_VCC==1]))
mapact<-data.frame(mapactlat,mapactlong)
colnames(mapact)<-c("Lat","Long")
write.csv(mapact,"/Users/roniyadlin/Dropbox/Climate conflict/output/figures/Mapping/actual.csv",row.names = FALSE)


